#include<unistd.h>

#include <math.h>
#include <ctype.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
#include <pthread.h>
#include <malloc.h>
#include <sys/stat.h>

#include "samtools-1.3/sam.h"
#include "samtools-1.3/sam_opts.h"
#include "samtools-1.3/htslib-1.3/htslib/faidx.h"

static  int view_mispair(char *file_input,int read_length,char *file_output,char * file_prefix ) ;

typedef  struct {
    int SnvCounts ,InsertCounts,SclipCounts ,UnmCounts;
} read1_percycle_t;

static void *ckalloc(size_t num, size_t size)
{
    void *p;
    if((p=calloc(num,size)) == NULL)
    {
        fprintf(stderr, "ERROR: Fail to alloc memory!\n");
        abort();
    }
    return p;
}
static char *fmt_pfx(char *prefix)
{
    if (strlen(prefix) == 0)
        return "";
    else if (*(prefix + strlen(prefix) - 1) == '.')
        return prefix;
    else
        return strcat(prefix, ".");
}

static FILE *ckopen(const char *file, const char *mode)
{
    FILE *fp;
    if((fp=fopen(file,mode)) == NULL)
    {
        fprintf(stderr, "ERROR: Fail to open file '%s'\n", file);
        abort();
    }
    return fp;
}


static int usage(char *str){

//    fprintf(stderr, "\033[2m \033[0m Program \033[2m \033[0m %-40s\033[2m \033[0m\n", str);
//    fprintf(stderr, "\033[2m \033[0m           -m  sorted bam file (required)     \033[2m \033[0m\n");
//    fprintf(stderr, "\033[2m \033[0m           -r  reference fasta file (required)   \033[2m \033[0m\n");
//    fprintf(stderr, "\033[2m \033[0m           -n  read length (required)   \033[2m \033[0m\n");
//    fprintf(stderr, "\033[2m \033[0m           -o  output directory (default: cwd)   \033[2m \033[0m\n");
//    fprintf(stderr, "\033[2m \033[0m           -p  output file prefix (required)   \033[2m \033[0m\n");
    fprintf(stderr,
            "Usage: %s  -m <bam> -r <ref> -n <read length> [-o] [-p]\n"
                    "Options:\n"
                    "  -m       input BAM (sorted) \n"
                    "  -r       Reference sequence FASTA FILE \n"
                    "  -n       read length [76] \n"
                    "  -o       output directory [.]\n"
                    "  -p       output file name prefix  \n",str);

    return 1;
}

//view
static  int view_mispair(char *file_input,int read_length,char *file_output,char * file_prefix ){
    char *z  = (char *) ckalloc(4096, sizeof(char));
    char *file ;
    FILE *fp;
    file = (char *)ckalloc(4096, sizeof(char));
    sprintf(file, "%s/%s_tmp.r", file_output, fmt_pfx(file_prefix));
    fp = ckopen(file, "w");
    fprintf(fp,
            "png(file='%s/%s_mispair.png',width = 1000, height = 500, units ='px',pointsize = 12,bg = \"white\", res = NA);"
             "par(mar=c(4,4,3,3));"
             "fp <- read.table(file=\"%s\",sep=\"\\t\",stringsAsFactors = FALSE,header = TRUE,check.names = FALSE);"
             "readlen =%d;"
    //         "ym <- ceiling( max(fp$R1_UnmRate,fp$R1_ScRate,fp$R1_InsRate,fp$R1_SnvRate,fp$R1_TotalRate));"
     //        "ym2 <- ceiling(max(fp$R2_UnmRate,fp$R2_ScRate,fp$R2_InsRate,fp$R2_SnvRate,fp$R2_TotalRate));"
       //      "ym3 <-max(ym,ym2);"
            "xm <- readlen *2;"
            "plot(NA,xlab=\"\",ylab=\"\",xlim=c(0,xm),xaxp=c(0,xm,xm),ylim = c(0,1),yaxp=c(0,1,100));"
            //"plot(NA,xlab=\"\",ylab=\"\",xlim=c(0,xm),xaxp=c(0,xm,xm),ylim = c(0,ym3),yaxp=c(0,ym,ym));"
            "x1 <- 1:readlen;"
            "x2 <- (readlen+1):xm;"
            "points(x1,fp$R1_TotalRate/100,type=\"b\",pch=20,lty=2,lwd=1,col=\"#6666ff\");"
            "points(x1,fp$R1_UnmRate/100,type = \"b\",pch=3,lty=2,lwd=1,col=\"#66ffff\");"
            "points(x1,fp$R1_ScRate/100,type = \"b\",pch=1,lty=2, lwd=1,col=\"#66ff66\");"
            "points(x1,fp$R1_InsRate/100,type=\"b\",pch=6,lty=2,lwd=1,col=\"#ff66b3\");"
            "points(x1,fp$R1_SnvRate/100,type=\"b\",pch=0,lty=2,lwd=1,col=\"#55b679\");"
            "abline(v=readlen,col=\"red\",lty=2,lwd=2);"
            "par(new=T);"
            "points(x2,fp$R2_TotalRate/100,type=\"b\",pch=20,lty=2,lwd=1,col=\"#6666ff\");"
            "points(x2,fp$R2_UnmRate/100,type = \"b\",pch=3,lty=2,lwd=1,col=\"#66ffff\");"
            "points(x2,fp$R2_ScRate/100,type = \"b\",pch=1,lty=2, lwd=1,col=\"#66ff66\");"
            "points(x2,fp$R2_InsRate/100,type=\"b\",pch=6,lty=2,lwd=1,col=\"#ff66b3\");"
            "points(x2,fp$R2_SnvRate/100,type=\"b\",pch=0,lty=2,lwd=1,col=\"#55b679\");"
            //"axis(4);"
            "title(main=\"Read1(left) / Read2(right) Multi-Mispair Rate \");"
            "legend(1,1,legend = c(\"TotalMispair\",\"Unmapped\",\"SoftClip\",\"Insertion\",\"Mismatch\"),col=c(\"#6666ff\",\"#66ffff\",\"#66ff66\",\"#ff66b3\",\"#55b679\"),lty=c(2,2,2,2,2),lwd=c(1,1,1,1,1),pch=c(20,3,1,6,0),bty = \"n\");"
            "dev.off()\n",
            file_output,file_prefix,file_input,read_length );

    fclose(fp);
    sprintf(z, "Rscript %s", file);
    system(z);
    remove(file);
    free(z);
    free(file);

    return  0 ;
}

//static  void modifyMispair()


int main(int argc, char *argv[]){
    int copt,len,tid = -2;
    char *bam = (char *)ckalloc(4096, sizeof(char));
    char *out_dir = (char *)ckalloc(4096, sizeof(char));
    char *out_pfx = (char *)ckalloc(4096, sizeof(char));
//    char *readLen = (char *)ckalloc(20,sizeof(char));
    int readLen ;
    readLen = 76 ;
    char *ref_file = (char *)ckalloc(4096, sizeof(char));
    char *ref = 0;
    faidx_t *fai ;


    while((copt = getopt(argc, argv, "m:n:r:o:p:h")) != -1)
    {
        switch(copt)
        {
            case 'h':
                return usage(argv[0]);
                break;
            case 'm':
                strcpy(bam, optarg);
                break;
            case 'n':
                readLen = atoi(optarg);
                break;
            case 'r':
                strcpy(ref_file,optarg);
                break;
            case 'o':
                strcpy(out_dir, optarg);
                break;
            case 'p':
                strcpy(out_pfx, optarg);
                break;
            default :
                return usage(argv[0]);
                break;

        }
    }
    if(!*ref_file || (access(ref_file, R_OK) == -1))
    {
        fprintf(stderr, "ERROR: Fail to read reference file!\n");
        return(usage(argv[0]));
    }
    if(!*out_dir)
    {
        fprintf(stderr, "Warning: Set output directroy to './'\n");
        strcpy(out_dir, ".");
    }
    if(access(out_dir,0)!=0 && mkdir(out_dir,0755)!=0)
    {
        fprintf(stderr, "ERROR: Fail to create output directory '%s'\n", out_dir);
        return usage(argv[0]);
    }



    fai = fai_load(ref_file);
    read1_percycle_t misPairInfo_a[2][readLen];
    int  read1_counts = 0 ,read2_counts = 0;
    int i_0 ,i_2;
    for ( i_0 = 0; i_0 < readLen;++i_0){
        for (i_2 =0;i_2 < 2; ++i_2) {
//            misPairInfo_a[i_2][i_0].Counts = 0;
            misPairInfo_a[i_2][i_0].InsertCounts = 0;
            misPairInfo_a[i_2][i_0].SclipCounts = 0;
            misPairInfo_a[i_2][i_0].SnvCounts = 0 ;
            misPairInfo_a[i_2][i_0].UnmCounts = 0 ;
        }

    }


    bam1_t *b;
    b = bam_init1();
    samFile *in=NULL;
    bam_hdr_t *h = NULL;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
    in = sam_open_format(bam, "r", &ga.in);
    h = sam_hdr_read(in);
    int read_len = 0;
    // foreach bam reads start
    int i_4 ;
    while(sam_read1(in, h, b) >= 0) // foreach alignment
    {
        read_len = b->core.l_qseq;
       // filter secondary,supplementary,not equalif ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) ) continue;
        if (read_len != readLen) continue;
        if (b->core.flag &BAM_FSECONDARY) continue;
        if (b->core.flag &BAM_FSUPPLEMENTARY) continue;






        if (b->core.tid >= 0) {
            if (tid != b->core.tid) {
                free(ref);
                ref = fai_fetch(fai, h->target_name[b->core.tid], &len);
                if (ref == 0)
                    fprintf(stderr, " fail to find sequence '%s' in the reference.\n",
                            h->target_name[tid]);
                tid = b->core.tid ;
            }

            if (ref) {
                uint8_t *seq = bam_get_seq(b);
                uint32_t *cigar = bam_get_cigar(b);
                bam1_core_t *c = &b->core;

                int ReadTmp_a[readLen];
                int i_3 ;
                for (i_3 = 0; i_3<readLen;++i_3){
                    ReadTmp_a[i_3] = 0;
                }


                if (c->flag &BAM_FUNMAP) {
//                    printf("UNMAP\n");

                    if (c->flag &BAM_FREAD1) {
//                        printf("UNMAP\tRead1\n");
                        read1_counts += 1 ;
                        for (i_4 =0 ; i_4 <readLen;++i_4) {
                            misPairInfo_a[0][i_4].UnmCounts += 1 ;
                        }

                    } else if (c->flag &BAM_FREAD2){
//                        printf("UNMAP\tRead2\n");
                        read2_counts += 1 ;
                        for (i_4 =0 ; i_4 <readLen;++i_4) {
                            misPairInfo_a[1][i_4].UnmCounts += 1 ;
                        }
                    }

                } else { //Mapped reads start
                    int z;
                    int i, x, y ;
                    int r1_or_r2 = 0 ;
                    int forward_or_reverse = 0;
//                     be sure forward strand or reverse strand ,read1 or read2
                    if (c->flag &BAM_FREAD1) {
                        read1_counts += 1 ;
                        r1_or_r2  = 1 ;
                        if (c->flag &BAM_FREVERSE){
                            forward_or_reverse = 3 ;
                        } else {
                            forward_or_reverse = 4 ;
                        }
                    } else if (c->flag &BAM_FREAD2) {
                        read2_counts += 1 ;
                        r1_or_r2 = 2 ;
                        if (c->flag &BAM_FREVERSE) {
                            forward_or_reverse = 3 ;
                        } else {
                            forward_or_reverse = 4 ;
                        }
                    }


                    z =0 ;

                    for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
                        int j, l = cigar[i]>>4, op = cigar[i]&0xf; //get oplen,op
                        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
                            for (j = 0; j < l; ++j) {
                                int c1, c2 ;
                                z = y + j;
                                if (x+j >= len || ref[x+j] == '\0') break; // out of bounds
                                c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
                                if (!((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0)) { // mismatch
//                                    ReadTmp_a[z] = 1;

                                    if (r1_or_r2 == 1 && forward_or_reverse == 3) {// read1 and reverse
                                        misPairInfo_a[0][readLen -1- z].SnvCounts += 1 ;
                                    } else if ( r1_or_r2 == 1 && forward_or_reverse == 4) { // read1 and forward
                                        misPairInfo_a[0][z].SnvCounts += 1;
                                    } else if ( r1_or_r2 == 2 && forward_or_reverse == 3) {  // read2 and reverse
                                        misPairInfo_a[1][readLen -1- z].SnvCounts += 1 ;
                                    } else if ( r1_or_r2 == 2 && forward_or_reverse == 4) { // read2 and forward
                                        misPairInfo_a[1][z].SnvCounts += 1 ;
                                    }


                                }

                            }
                            if (j < l) break;
                            x += l; y += l;

                        } else if (op == BAM_CDEL) { //DEL
                            for (j = 0; j < l; ++j) {
                                if (x+j >= len || ref[x+j] == '\0') break;
                            }
                            x +=  j;
                            if (j < l) break;

                        } else if (op == BAM_CINS ) {  //INSERTON
                            for (j = 0; j < l; ++j) {
                                z = y + j;
//                                ReadTmp_a[z] = 2;
                                if (r1_or_r2 == 1 && forward_or_reverse == 3) {// read1 and reverse
                                    misPairInfo_a[0][readLen -1- z].InsertCounts += 1 ;
                                } else if ( r1_or_r2 == 1 && forward_or_reverse == 4) { // read1 and forward
                                    misPairInfo_a[0][z].InsertCounts += 1;
                                } else if ( r1_or_r2 == 2 && forward_or_reverse == 3) {  // read2 and reverse
                                    misPairInfo_a[1][readLen -1- z].InsertCounts += 1 ;
                                } else if ( r1_or_r2 == 2 && forward_or_reverse == 4) { // read2 and forward
                                    misPairInfo_a[1][z].InsertCounts += 1 ;
                                }
                            }
                             y += l;

                        } else if (op == BAM_CSOFT_CLIP ) { //SOFT_CLIP
                            for (j = 0; j < l; ++j) {
                                z = y + j;
//                                ReadTmp_a[z] = 3;
                                if (r1_or_r2 == 1 && forward_or_reverse == 3) {// read1 and reverse
                                    misPairInfo_a[0][readLen -1- z].SclipCounts += 1 ;
                                } else if ( r1_or_r2 == 1 && forward_or_reverse == 4) { // read1 and forward
                                    misPairInfo_a[0][z].SclipCounts += 1;
                                } else if ( r1_or_r2 == 2 && forward_or_reverse == 3) {  // read2 and reverse
                                    misPairInfo_a[1][readLen -1- z].SclipCounts += 1 ;
                                } else if ( r1_or_r2 == 2 && forward_or_reverse == 4) { // read2 and forward
                                    misPairInfo_a[1][z].SclipCounts += 1 ;
                                }
                            }
                            y += l;

                        }  else if (op == BAM_CREF_SKIP) {
                            x += l;
                        }
                    }

                } // END Mapped

            }
        } // tid end


    } // foreach bam reads end

    bam_destroy1(b);
    bam_hdr_destroy(h);

    free(ref);
    fai_destroy(fai);
    sam_close(in);
    float unmap_rate,sclip_rate,inset_rate,snv_rate,total_rate;
    int TotalMisPairCounts = 0;
    int read_counts[2] = {read1_counts,read2_counts};
    int ab1,ab2,ab3,ab4;
    char *output_file ;
    FILE *fp;
    output_file = (char *)ckalloc(4096, sizeof(char));
    sprintf(output_file, "%s/%sMultiMisPairCounts.txt", out_dir, fmt_pfx(out_pfx));
    fp = ckopen(output_file, "w");

    fprintf(fp,"%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n",
            "Cycle","R1Counts","R2Counts",
            "R1_UnmCounts","R1_UnmRate","R1_SclipCounts","R1_ScRate","R1_InsertCounts","R1_InsRate","R1_SnvCounts","R1_SnvRate","R1_TotalMisCounts","R1_TotalRate",
            "R2_UnmCounts","R2_UnmRate","R2_SclipCounts","R2_ScRate","R2_InsertCounts","R2_InsRate","R2_SnvCounts","R2_SnvRate","R2_TotalMisCounts","R2_TotalRate");



    for ( i_0 = 0; i_0 < readLen;++i_0) { // Start generate counts result
        fprintf(fp,"%d\t%d\t%d\t",i_0+1,read1_counts,read2_counts);
        for (i_2 = 0; i_2 < 2; ++i_2) {
            ab1 = misPairInfo_a[i_2][i_0].UnmCounts ;
            ab2 = misPairInfo_a[i_2][i_0].SclipCounts ;
            ab3 = misPairInfo_a[i_2][i_0].InsertCounts ;
            ab4 = misPairInfo_a[i_2][i_0].SnvCounts ;
            TotalMisPairCounts = ab1 + ab2 + ab3 + ab4 ;

            if ( read_counts[i_2] == 0) {
                unmap_rate = 0.0; sclip_rate=0.0; inset_rate =0.0;snv_rate=0.0;total_rate = 0.0;
            } else {
                unmap_rate = (float) ab1 / (float) read_counts[i_2] * 100;
                sclip_rate = (float) ab2 / (float) read_counts[i_2] * 100;
                inset_rate = (float) ab3 / (float) read_counts[i_2] * 100;
                snv_rate = (float) ab4 / (float) read_counts[i_2] * 100;
                total_rate = (float) TotalMisPairCounts / (float) read_counts[i_2] * 100 ;
            }

//            printf("%d\t%.2f\t%d\t%.2f\t%d\t%.2f\t%d\t%.2f\t%d\t%.2f\t",ab1,unmap_rate,ab2,sclip_rate,ab3,inset_rate,ab4,snv_rate,TotalMisPairCounts,total_rate);
            fprintf(fp,"%d\t%.2f\t%d\t%.2f\t%d\t%.2f\t%d\t%.2f\t%d\t%.2f\t",ab1,unmap_rate,ab2,sclip_rate,ab3,inset_rate,ab4,snv_rate,TotalMisPairCounts,total_rate);




        }
        fprintf(fp,"\n");
    } // End generate counts result
    fclose(fp);
//view_mispair(char *file_input,int read_length,char *file_output,char * file_prefix )
    if ( read1_counts == 0 && read2_counts == 0) {
        fprintf(stderr,"  Both Read1 and Read2 Counts is Zero");
    } else {
        view_mispair(output_file,readLen,out_dir,out_pfx)  ;

    }
    free(output_file);
    free(out_dir) ;
    free(out_pfx) ;


    return 0;
}
